library(TDA)
library(tdaTS) #our package
library(tibble)
library(plotly) #3D plotting
library(fields) #image.plot function

1 Objective

We propose to compare the persistence diagram or the barcodes of data generated with the same topology.

We need to find a distance between barcodes close to zero even when the barcodes look different (after some kind a rescaling…)

2 Detecting a hole (H1)

myplots <- function(data, complex = "alpha")
{
  if(complex == "alpha")
  {
      DiagAlphaCmplx <- alphaComplexDiag(X = data,
                                     library = c("GUDHI", "Dionysus"),
                                     location = TRUE)
  }
  if(complex == "rips")
  {
      DiagAlphaCmplx <- ripsDiag(X = data,
                                maxdimension = 1,
                                maxscale = 1,
                                dist = "euclidean",
                                library = c("GUDHI", "Dionysus"),
                                location = TRUE)
  }

  plot(DiagAlphaCmplx$diagram, barcode = TRUE)
  plot(data, col = 1,xaxt="n", yaxt="n",xlab="", ylab="", asp = 1)
  one <- which(DiagAlphaCmplx[["diagram"]][, 1] == 1)
  for (i in seq(along = one))
  {
  for (j in seq_len(dim(DiagAlphaCmplx[["cycleLocation"]][[one[i]]])[1]))
    {lines(DiagAlphaCmplx[["cycleLocation"]][[one[i]]][j, , ] + rnorm(n = 4,sd = 0.0),cex = 1, col = i + 1)}
  }
  return(DiagAlphaCmplx$diagram)
}

2.1 Square and Square with a hole

pointSquareHole <- function(n, Hole_Relative_length = 0)
{
  len <- sqrt(Hole_Relative_length)
  a <- (1-Hole_Relative_length)/2
  b <- (1+Hole_Relative_length)/2
  mat <- matrix(NA,nrow = 0, ncol = 2)
  while(nrow(mat) < n)
  {
    u <- runif(2)
    if(!(u[1] > a &  u[1] < b  & u[2] > a & u[2] < b)){mat <- rbind(mat, u)}
    
  }
  res <- mat %>%
    as_tibble(.name_repair = "minimal") %>%
    setNames(c("x","y"))
  return(res)
}

2.2 Different hole size

data <- pointSquareHole(5000, 0.075)
pl <- myplots(data)

data <- pointSquareHole(5000, 0.15)
pl <- myplots(data)

data <- pointSquareHole(5000, 0.3)
pl <- myplots(data)

CONCLUSION 1: We need a distance between barcodes, which doesn’t take into account the death time of the hole (the most persistent elements).

2.3 Different sampling with a hole

data <- pointSquareHole(50, 0.3)
pl <- myplots(data)

data <- pointSquareHole(500, 0.3)
pl <- myplots(data)

data <- pointSquareHole(5000, 0.3)
pl <- myplots(data)

We need a distance between barcodes, which doesn’t detect differences bertween the 3 barcodes.

CONCLUSION 2: less point = less information. The distance has to take the number of points into account

pointSquareHole(50, 0.3) contains a hole but we barely can see it.

2.4 Different sampling when no hole

data <- pointSquareHole(100, 0)
pl <- myplots(data)

data <- pointSquareHole(300, 0)
pl <- myplots(data)

data <- pointSquareHole(1000, 0)
pl <- myplots(data)

CONCLUSION 3: we see a kind of limit distribution for H1 elements…

3 Closing or not closing (a circle)

pointCircleGap <- function(n, gap)
{
  gap <- 2 * pi * gap
  t <- runif(n, min = 0, max = 2 * pi - gap)
  x <- cos(t)
  y <- sin(t)
  res <- matrix(c(x,y), nrow = n, byrow = FALSE) %>%
    as_tibble(.name_repair = "minimal") %>%
    setNames(c("x","y"))
  return(res)
}
data <- pointCircleGap(100, 0)
pl <- myplots(data)

data <- pointCircleGap(100, 0.05)
pl <- myplots(data)

data <- pointCircleGap(100, 0.1)
pl <- myplots(data)

data <- pointCircleGap(100, 0.3)
pl <- myplots(data)

The same with additional Gaussian noise.

data<- pointCircleGap(100, 0)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)

data <- pointCircleGap(100, 0.1)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)

data <- pointCircleGap(100, 0.2)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)

data <- pointCircleGap(100, 0.3)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)

If no gap, the H1 main element appears at the same time a the last fusion in H0 elements.

The bigger the gap, the bigger the delay in H1.

CONCLUSION 4: we need a distance that is not sensitive to the gap length between fusion of all H0 and appearence of H1. It has to be only sensitive to the presence/absence of a gap.

4 Comparing different complexes

data <- pointSquareHole(100,0)
pl <- myplots(data, complex = "alpha")

pl <- myplots(data, complex = "rips")

5 Comparing different noise

Gaussian versus Uniform noise

data<- pointSquareHole(500, 0)
pl <- myplots(data, complex = "alpha")

data<- data.frame(matrix(0, nrow = 500, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 500, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 500, mean = 0, sd = 10)
pl <- myplots(data, complex = "alpha")

Alpha diagrams (Rips doesn’t work…)

data<- pointSquareHole(10000, 0)
diag1 <- myplots(data)

data<- data.frame(matrix(0, nrow = 10000, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 10)
diag2 <- myplots(data)

sel <- diag1[,1] == 1
res1 <- diag1[sel,3]/diag1[sel,2]
sel <- diag2[,1] == 1
res2 <- diag2[sel,3]/diag2[sel,2]

Gumbel distribution?

hist(log(log(res1)) - mean(log(log(res1))), breaks = 200)

hist(log(log(res2))- mean(log(log(res1))), breaks = 200)

Max values

max(res1)
## [1] 9.439811
max(res2)
## [1] 7.666121

The Square with a hole?

data <- pointSquareHole(10000, 0.3)
diag3 <- myplots(data)

sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)

Max value

max(res3)
## [1] 584.8804

The circle closed + small noise

data <- pointCircleGap(10000, 0)
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 0.01)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 0.01)
diag3 <- myplots(data)

sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)

Max value

max(res3)
## [1] 56973.07

The circle not closed + small noise

data <- pointCircleGap(10000, 0.03)
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 0.01)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 0.01)
diag3 <- myplots(data)

sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)

Max value

max(res3)
## [1] 154.1639

6 Conclusions

We need a distance between barcodes, which doesn’t take into account the death time of the hole (the most persistent elements)

The distance should be focused on what’s happening just after H0 fusions: presence or absence of a gap between main fusions and appearence og H1 elements.

less point = less information. The distance has to take the quantiy of information into account